{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:04:21.070001200Z",
     "start_time": "2024-03-30T06:04:21.064998900Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import zipfile\n",
    "import javalang\n",
    "\n",
    "dataset_name = r\"gson\"\n",
    "BASE_DIR = r\"C:\\Users\\tianfy\\Desktop\\毕设\\数据集\\A3\"\n",
    "source_folder = r\"D:\\IDEA_Projects\\TestJavaCode\\src\\main\"\n",
    "JDK_SOURCE = r\"C:\\Users\\tianfy\\.jdks\\corretto-1.8.0_382\\src.zip\"\n",
    "EXPORT_DIR = fr\"C:\\Users\\tianfy\\Desktop\\毕设\\数据集\\A3\\{dataset_name}\"\n",
    "PROJECT_DIR = r\"D:/IDEA_Projects/TestJavaCode\"\n",
    "MAVEN_COMPILE_NAME = r\"compile.txt\"\n",
    "if not os.path.exists(EXPORT_DIR):\n",
    "    os.makedirs(EXPORT_DIR)\n",
    "FILE_PATH = fr\"{BASE_DIR}\\{dataset_name}.txt\"\n",
    "with open(FILE_PATH, \"r\", encoding=\"utf-8\") as f:\n",
    "    data = f.read()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "raw: 126\n",
      "filtered: 125\n"
     ]
    }
   ],
   "source": [
    "testcases = data.split(\"@Test\")\n",
    "print(f\"raw: {len(testcases)}\")\n",
    "testcases = [f\"@Test\\n{x}\" for x in testcases if x.strip() != \"\"]\n",
    "print(f\"filtered: {len(testcases)}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:04:51.917344Z",
     "start_time": "2024-03-30T06:04:51.913343Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "no syntex error: 58 (-67)\n"
     ]
    }
   ],
   "source": [
    "import javalang\n",
    "\n",
    "count = 0\n",
    "\n",
    "\n",
    "def is_syntax_error(testcase):\n",
    "    global count\n",
    "    try:\n",
    "        template = fr\"\"\"\n",
    "        public class Generated_{count}_Test{{\n",
    "            {testcase}\n",
    "        }}\n",
    "        \"\"\"\n",
    "        tree = javalang.parse.parse(template)\n",
    "        count += 1\n",
    "        return False\n",
    "    except:\n",
    "        return True\n",
    "\n",
    "_ = len(testcases)\n",
    "testcases = [x for x in testcases if not is_syntax_error(x)]\n",
    "print(f\"no syntex error: {len(testcases)} (-{_-len(testcases)})\")\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:04:52.498875900Z",
     "start_time": "2024-03-30T06:04:52.390775500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "outputs": [],
   "source": [
    "class_map = {}\n",
    "import os\n",
    "\n",
    "\n",
    "def update_class_ref(source_folder, class_map):\n",
    "    for root, dirs, files in os.walk(source_folder):\n",
    "        for file in files:\n",
    "            if file.endswith(\".java\"):\n",
    "                class_name = file[:-5]\n",
    "                root = root.replace(\"\\\\\", \".\")\n",
    "                root = root.replace(\"/\", \".\")\n",
    "                if \"src.main.java.\" not in root:\n",
    "                    continue\n",
    "                ref = root.split(\"src.main.java.\")[1]\n",
    "                if class_name not in class_map:\n",
    "                    class_map[class_name] = [ref]\n",
    "                else:\n",
    "                    class_map[class_name].append(ref)\n",
    "\n",
    "\n",
    "def update_class_ref_zip(source_zip, class_map):\n",
    "    with zipfile.ZipFile(source_zip, 'r') as z:\n",
    "        for filename in z.namelist():\n",
    "            if filename.endswith(\".java\"):\n",
    "                class_name = filename.split(\"/\")[-1][:-5]\n",
    "                root = os.path.dirname(filename).replace(\"/\", \".\")\n",
    "                if \"java.lang.\" in root:\n",
    "                    continue\n",
    "                if class_name not in class_map:\n",
    "                    class_map[class_name] = [root]\n",
    "                else:\n",
    "                    class_map[class_name].append(root)\n",
    "\n",
    "\n",
    "update_class_ref(source_folder, class_map)\n",
    "update_class_ref_zip(r\"C:\\Users\\tianfy\\.m2\\repository\\junit\\junit\\4.13.2\\junit-4.13.2-sources.jar\", class_map)\n",
    "update_class_ref_zip(JDK_SOURCE, class_map)\n",
    "\n",
    "\n",
    "def class_name_2_import(namelist: list[str]):\n",
    "    imports = [\n",
    "        \"import org.junit.Test;\",\n",
    "        \"import static org.junit.Assert.*;\",\n",
    "        \"import static org.hamcrest.CoreMatchers.*;\",\n",
    "\n",
    "    ]\n",
    "    for class_name in namelist:\n",
    "        imports.append(f\"import {class_map[class_name][0]}.{class_name};\")\n",
    "    imports = list(set(imports))\n",
    "    return \"\\n\".join(imports)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:05:18.288023900Z",
     "start_time": "2024-03-30T06:05:18.168273Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "outputs": [],
   "source": [
    "def get_testcase_imports(testcase):\n",
    "    tokens = list(javalang.tokenizer.tokenize(testcase))\n",
    "    identifier_tokens = [x.value for x in tokens if isinstance(x, javalang.tokenizer.Identifier)]\n",
    "    valid_identifier_tokens = [x for x in identifier_tokens if x in class_map]\n",
    "    return class_name_2_import(valid_identifier_tokens)\n",
    "\n",
    "\n",
    "for index, testcase in enumerate(testcases):\n",
    "    with open(fr\"{EXPORT_DIR}\\Generated_{index}_Test.java\", \"w\", encoding=\"utf-8\") as f:\n",
    "        template = f\"\"\"\n",
    "\n",
    "        {get_testcase_imports(testcase)}\n",
    "        public class Generated_{index}_Test{{\n",
    "            {testcase}\n",
    "        }}\n",
    "        \"\"\"\n",
    "        f.write(template.strip())\n",
    "_ = len(testcases)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:05:20.598565600Z",
     "start_time": "2024-03-30T06:05:20.563358300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "outputs": [],
   "source": [
    "compile_txt = open(fr\"{PROJECT_DIR}/{MAVEN_COMPILE_NAME}\", \"r\", encoding=\"utf16\").readlines()\n",
    "compile_txt = [x.strip() for x in compile_txt if x.startswith(\"[ERROR]\") and PROJECT_DIR in x]\n",
    "compile_txt = \"\\n\".join(compile_txt)\n",
    "pattern = f\"{PROJECT_DIR}/src/test/java/(.*?):\\[\"\n",
    "import re\n",
    "# search\n",
    "matches = re.findall(pattern, compile_txt)\n",
    "# remove duplicates\n",
    "matches = list(set(matches))\n",
    "# remove file\n",
    "for match in matches:\n",
    "    os.remove(fr\"{PROJECT_DIR}/src/test/java/{match}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:07:03.284387900Z",
     "start_time": "2024-03-30T06:07:03.266972300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 58\n"
     ]
    }
   ],
   "source": [
    "# 统计剩余的测试用例个数\n",
    "import os\n",
    "count = 0\n",
    "for root, dirs, files in os.walk(fr\"{PROJECT_DIR}/src/test/java\"):\n",
    "    for file in files:\n",
    "        if file.startswith(\"Generated_\") and file.endswith(\".java\"):\n",
    "            count += 1\n",
    "print(count, len(testcases) - count)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-30T06:07:11.622801500Z",
     "start_time": "2024-03-30T06:07:11.616290800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
